home *** CD-ROM | disk | FTP | other *** search
- package regex;
-
- import java.util.Hashtable;
-
- class RegExpParser {
- private static final int TK_END = 0;
- private static final int TK_CHAR = 1;
- private static final int TK_CHARCLASS = 2;
- private static final int TK_UNION = 10;
- private static final int TK_LPAR = 11;
- private static final int TK_RPAR = 12;
- private static final int TK_CLOSURE = 13;
- private static final int TK_PLUS = 14;
- private static final int TK_QUESTION = 15;
- private static final int TK_ANYCHAR = 16;
- private static final int TK_LBRA = 17;
- private static final int TK_LBRANEG = 18;
- private static final int TK_RBRA = 19;
- private static final int TK_LHEAD = 20;
- private static final int TK_LTAIL = 21;
- private static final char CHAR_VL = '|';
- private static final char CHAR_LPAR = '(';
- private static final char CHAR_RPAR = ')';
- private static final char CHAR_ASTERISK = '*';
- private static final char CHAR_PLUS = '+';
- private static final char CHAR_QUESTION = '?';
- private static final char CHAR_DOT = '.';
- private static final char CHAR_LBRA = '[';
- private static final char CHAR_RBRA = ']';
- private static final char CHAR_CARET = '^';
- private static final char CHAR_DOLLAR = '$';
- private static final char CHAR_MINUS = '-';
- private static final char CHAR_BKSLASH = '\\';
- private static final String MSG_INVALID_ESCAPE = "Invalid escape character.";
- private static final String MSG_INVALID_CHARRANGE = "Invalid character range.";
- private static final String MSG_RBRA_EXPECTED = "\"]\" is expected.";
- private static final String MSG_RPAR_EXPECTED = "\")\" is expected.";
- private static final String MSG_CHAR_EXPECTED = "Normal character is expected.";
- private int currentToken;
- private Chars tokenChars;
- private String strbuff = "";
- private int pstr = 0;
- private boolean inCharClass = false;
- private static Hashtable escapeTable = new Hashtable();
-
- private int getTokenCC() throws RegExpSyntaxException {
- if (this.pstr == this.strbuff.length()) {
- throw new RegExpSyntaxException("\"]\" is expected.");
- } else {
- char var1 = this.strbuff.charAt(this.pstr++);
- if (var1 == ']') {
- this.currentToken = 19;
- } else {
- if (var1 == '\\') {
- if (this.pstr == this.strbuff.length()) {
- throw new RegExpSyntaxException("Illegal escape sequense.");
- }
-
- var1 = this.strbuff.charAt(this.pstr++);
- }
-
- this.currentToken = 1;
- char var2 = this.strbuff.charAt(this.pstr++);
- if (var2 == '-') {
- if (this.pstr == this.strbuff.length()) {
- throw new RegExpSyntaxException("\"]\" is expected.");
- }
-
- char var3 = this.strbuff.charAt(this.pstr++);
- if (var3 == ']') {
- this.pstr -= 2;
- this.tokenChars = new Chars(var1);
- } else {
- if (var3 == '\\') {
- if (this.pstr == this.strbuff.length()) {
- throw new RegExpSyntaxException("Invalid escape character.");
- }
-
- var3 = this.strbuff.charAt(this.pstr++);
- }
-
- this.currentToken = 2;
- if (var1 > var3) {
- throw new RegExpSyntaxException("Invalid character range.");
- }
-
- this.tokenChars = new Chars(var1, var3);
- }
- } else {
- this.pstr += -1;
- this.tokenChars = new Chars(var1);
- }
- }
-
- return this.currentToken;
- }
- }
-
- private RTree term() throws RegExpSyntaxException {
- RTree var1;
- if (this.currentToken != 10 && this.currentToken != 12 && this.currentToken != 0) {
- for(var1 = this.factor(); this.currentToken != 10 && this.currentToken != 12 && this.currentToken != 0; var1 = new RTree(2, var1, this.factor())) {
- }
- } else {
- var1 = new RTree(0, (RTree)null, (RTree)null);
- }
-
- return var1;
- }
-
- private RTree charClass() throws RegExpSyntaxException {
- this.nextToken();
- if (this.currentToken == 19) {
- throw new RegExpSyntaxException("Invalid character class.");
- } else {
- RTree var1 = new RTree(this.tokenChars);
- this.nextToken();
-
- while(this.currentToken != 19) {
- var1 = new RTree(3, var1, new RTree(this.tokenChars));
- this.nextToken();
- }
-
- return var1;
- }
- }
-
- private void initialize(String var1) throws RegExpSyntaxException {
- this.strbuff = this.processEscape(var1);
- this.pstr = 0;
- this.nextToken();
- }
-
- private RTree negativeCharClass() throws RegExpSyntaxException {
- this.nextToken();
- if (this.currentToken == 19) {
- throw new RegExpSyntaxException("Invalid character class.");
- } else {
- RTree var1 = new RTree(new Chars('\u0000', '\uffff'));
-
- while(this.currentToken != 19) {
- var1.removeChars(this.tokenChars);
- this.nextToken();
- }
-
- return var1;
- }
- }
-
- public RTree parse(String var1) throws RegExpSyntaxException {
- this.initialize(var1);
- RTree var2 = this.regexp();
- if (this.currentToken != 0) {
- throw new RegExpSyntaxException("Extra character at end of pattren.");
- } else {
- return var2;
- }
- }
-
- private void nextToken() throws RegExpSyntaxException {
- if (this.inCharClass) {
- int var1 = this.getTokenCC();
- if (var1 == 19) {
- this.inCharClass = false;
- }
- } else {
- int var2 = this.getTokenStd();
- if (var2 == 17 || var2 == 18) {
- this.inCharClass = true;
- }
- }
-
- }
-
- public RegExpParser() {
- }
-
- private RTree factor() throws RegExpSyntaxException {
- RTree var1 = this.primary();
- if (this.currentToken == 13) {
- var1 = new RTree(4, var1, (RTree)null);
- this.nextToken();
- } else if (this.currentToken == 14) {
- var1 = new RTree(2, var1, new RTree(4, var1, (RTree)null));
- this.nextToken();
- } else if (this.currentToken == 15) {
- var1 = new RTree(3, var1, new RTree(0, (RTree)null, (RTree)null));
- this.nextToken();
- }
-
- return var1;
- }
-
- private String processEscape(String var1) {
- String var6 = "";
-
- for(int var2 = 0; var2 < var1.length(); ++var2) {
- char var3;
- if ((var3 = var1.charAt(var2)) == '\\') {
- ++var2;
- String var5 = var1.substring(var2, var2 + 1);
- String var4 = (String)escapeTable.get(var5);
- if (var4 == null) {
- var6 = var6 + "\\" + var5;
- } else {
- var6 = var6 + var4;
- }
- } else {
- var6 = var6 + var3;
- }
- }
-
- return var6;
- }
-
- private int getTokenStd() throws RegExpSyntaxException {
- if (this.pstr == this.strbuff.length()) {
- this.currentToken = 0;
- } else {
- char var1 = this.strbuff.charAt(this.pstr++);
- switch (var1) {
- case '$':
- this.currentToken = 21;
- this.tokenChars = new Chars(var1);
- break;
- case '(':
- this.currentToken = 11;
- break;
- case ')':
- this.currentToken = 12;
- break;
- case '*':
- this.currentToken = 13;
- break;
- case '+':
- this.currentToken = 14;
- break;
- case '.':
- this.currentToken = 16;
- break;
- case '?':
- this.currentToken = 15;
- break;
- case '[':
- if (this.pstr == this.strbuff.length()) {
- throw new RegExpSyntaxException("\"]\" is expected.");
- }
-
- var1 = this.strbuff.charAt(this.pstr++);
- if (var1 == '^') {
- this.currentToken = 18;
- } else {
- this.pstr += -1;
- this.currentToken = 17;
- }
- break;
- case '\\':
- if (this.pstr == this.strbuff.length()) {
- throw new RegExpSyntaxException("Invalid escape character.");
- }
-
- this.currentToken = 1;
- this.tokenChars = new Chars(this.strbuff.charAt(this.pstr++));
- break;
- case '^':
- this.currentToken = 20;
- this.tokenChars = new Chars(var1);
- break;
- case '|':
- this.currentToken = 10;
- break;
- default:
- this.currentToken = 1;
- this.tokenChars = new Chars(var1);
- }
- }
-
- return this.currentToken;
- }
-
- static {
- escapeTable.put("0", "\u0000");
- escapeTable.put("b", "\b");
- escapeTable.put("t", "\t");
- escapeTable.put("r", "\r");
- escapeTable.put("n", "\n");
- escapeTable.put("d", "[0-9]");
- escapeTable.put("D", "[^0-9]");
- escapeTable.put("s", "[ \t\r\n]");
- escapeTable.put("S", "[^ \t\r\n]");
- escapeTable.put("w", "[0-9A-Z_a-z]");
- escapeTable.put("W", "[^0-9A-Z_a-z]");
- }
-
- private RTree primary() throws RegExpSyntaxException {
- RTree var1;
- switch (this.currentToken) {
- case 1:
- var1 = new RTree(this.tokenChars);
- this.nextToken();
- break;
- case 11:
- this.nextToken();
- var1 = this.regexp();
- if (this.currentToken != 12) {
- throw new RegExpSyntaxException("\")\" is expected.");
- }
-
- this.nextToken();
- break;
- case 16:
- var1 = new RTree(new Chars('\u0000', '\uffff'));
- this.nextToken();
- break;
- case 17:
- var1 = this.charClass();
- if (this.currentToken != 19) {
- throw new RegExpSyntaxException("\"]\" is expected.");
- }
-
- this.nextToken();
- break;
- case 18:
- var1 = this.negativeCharClass();
- if (this.currentToken != 19) {
- throw new RegExpSyntaxException("\"]\" is expected.");
- }
-
- this.nextToken();
- break;
- case 20:
- var1 = new RTree(5, this.tokenChars, (RTree)null, (RTree)null);
- this.nextToken();
- break;
- case 21:
- var1 = new RTree(6, this.tokenChars, (RTree)null, (RTree)null);
- this.nextToken();
- break;
- default:
- throw new RegExpSyntaxException("Normal character is expected.");
- }
-
- return var1;
- }
-
- private RTree regexp() throws RegExpSyntaxException {
- RTree var1;
- for(var1 = this.term(); this.currentToken == 10; var1 = new RTree(3, var1, this.term())) {
- this.nextToken();
- }
-
- return var1;
- }
- }
-